library("dplyr")
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library("igraph")
##
## Attaching package: 'igraph'
## The following objects are masked from 'package:dplyr':
##
## as_data_frame, groups, union
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
library("gprofiler2")
# Load network
if (!exists("modTOM")) {
load("modTOM_step.RData")
}
# Load PPI from IID
dataFirst <- read.csv(file = "PPIs_1st.txt", header = TRUE)
Old removed code:
netFirstH2 <- induced_subgraph(wnet, vids = unlist(neighborhood(netFirst, order = 1, nodes = V(netFirst)[“HDAC2”])), impl = “copy_and_delete”) write_graph(netFirstH2, file = “netfirsth2.graphml”, format = “graphml”)
########## Create co-expression network ##########
net0 <- igraph::graph.adjacency(modTOM, weighted = TRUE, mode = "undirected")
# Remove self loops
net0 <- simplify(net0)
########## Create PPI network ##########
# Create network from 1st interactions
netFirst <- graph_from_data_frame(dataFirst[3:15])
# Get all lncRNA ids
allLnc <-
probes1 %>%
filter(gene_type != "protein_coding") %>%
select(gene_name) %>%
unlist() %>%
unname()
# Get all EWS-FLI1 1st interactor ids
netFirstNodes <- names(V(netFirst))
# Index of net0 nodes to keep
i <- names(V(net0)) %in% c(netFirstNodes, allLnc)
# Subnetwork from net0 (all co-expression)
# Get a subnetwork with nodes from netFirst and allLnc
# Nodes are in PPI plus all lncRNA
netWgcnaPpiFirst0 <- induced_subgraph(net0, vids = i, impl = "copy_and_delete")
# Summary of netWgcnaPpiFirst weights
print("netWgcnaPpiFirst0 weight summary info")
## [1] "netWgcnaPpiFirst0 weight summary info"
print(summary(E(netWgcnaPpiFirst0)$weight))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000049 0.0001523 0.0005636 0.0020555 0.0020125 0.3173470
# Compare to the original net0
print("net0 weight summary info")
## [1] "net0 weight summary info"
print(summary(E(net0)$weight))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000039 0.0002450 0.0010423 0.0039343 0.0039293 0.3180248
# Only get connections above 90%
quantileValue = 0.90
i <- E(netWgcnaPpiFirst0)$weight > unname(quantile(E(netWgcnaPpiFirst0)$weight, quantileValue))
# Create a smaller subnetwork based on cutoff
netWgcnaPpiFirst <- subgraph.edges(netWgcnaPpiFirst0, E(netWgcnaPpiFirst0)[i], delete.vertices = TRUE)
########## Annotate network ##########
tempAnnot <- probes1 %>% filter(gene_name %in% names(V(netWgcnaPpiFirst)))
# Find duplicated itens
tempAnnot$gene_name[duplicated(tempAnnot$gene_name)]
## [1] "GOLGA8M" "HSPA14"
# Check wich gene_id to keep
geneInfo %>% filter(gene_name == c("GOLGA8M", "HSPA14"))
## gene_id gene_name moduleColor
## 1 ENSG00000261480 GOLGA8M black
## 2 ENSG00000284024 HSPA14 darkgreen
## 3 ENSG00000187522 HSPA14 darkmagenta
## 4 ENSG00000188626 GOLGA8M darkmagenta
# We should keep ENSG00000187522 and ENSG00000188626
tempAnnot <- subset(tempAnnot, gene_id != "ENSG00000187522" & gene_id != "ENSG00000188626")
# Match nodes order
tempAnnot <- arrange(tempAnnot, match(gene_name, names(V(netWgcnaPpiFirst))))
# Add node attributes to the network
V(netWgcnaPpiFirst)$gene_id <- tempAnnot$gene_id
V(netWgcnaPpiFirst)$gene_type <- tempAnnot$gene_type
# Save network for inspection
#write_graph(netWgcnaPpiFirst, file = "netWgcnaPpiFirst.graphml", format = "graphml")
There are duplicated items because some gene have the same name for lncRNA and their protein coding version. Here are only two. We can check their modules and keep only the right ones. Alternatively, we could work with ensembl ids from the start.
GO of co-expression module “darkmagenta” after we filter PPI items. Used a custom annotation for gProfiler.
# Already uploaded the first time
upload_GMT_file(gmtfile = "cancer_hdac_genesets.zip")
Your custom annotations ID is gp__ZEow_oHws_ILU You can use this ID as an ‘organism’ name in all the related enrichment tests against this custom source. Just use: gost(my_genes, organism = ’gp__ZEow_oHws_ILU’) [1] “gp__ZEow_oHws_ILU”
geneList <- names(V(netWgcnaPpiFirst))
gostres <- gost(query = geneList,
organism = "gp__ZEow_oHws_ILU", ordered_query = FALSE,
multi_query = FALSE, significant = TRUE, exclude_iea = FALSE,
measure_underrepresentation = FALSE, evcodes = FALSE,
user_threshold = 0.05, correction_method = "g_SCS",
domain_scope = "annotated", custom_bg = NULL,
numeric_ns = "", sources = NULL)
## Detected custom GMT source request
p <- gostplot(gostres, capped = TRUE, interactive = TRUE)
write.csv(apply(gostres$result,2,as.character), file = "gostres_netWgcnaPpiFirst_gp__ZEow_oHws_ILU.tsv", row.names = F)
p
Here we create a network with only HDAC2 interactors.
# Create a network for HDAC2
netWgcnaPpiHDAC2 <- induced.subgraph(netWgcnaPpiFirst, vids = unlist( neighborhood(netWgcnaPpiFirst, order=1, nodes = V(netWgcnaPpiFirst)["HDAC2"]) ) )
netWgcnaPpiHDAC2
## IGRAPH a0010ee UNW- 354 21552 --
## + attr: name (v/c), gene_id (v/c), gene_type (v/c), weight (e/n)
## + edges from a0010ee (vertex names):
## [1] CUL3 --FSCN1 U2AF2 --FSCN1 CUL3 --TOP2B U2AF2 --TOP2B
## [5] CUL3 --HSP90AA1 U2AF2 --CAD CUL3 --CHERP U2AF2 --CHERP
## [9] FSCN1 --CHERP TOP2B --CHERP HSP90AA1--CHERP CAD --CHERP
## [13] U2AF2 --RPL6 CAD --RPL6 CHERP --RPL6 CUL3 --SETD1A
## [17] U2AF2 --SETD1A FSCN1 --SETD1A TOP2B --SETD1A HSP90AA1--SETD1A
## [21] CAD --SETD1A CHERP --SETD1A RPL6 --SETD1A CUL3 --RTCB
## [25] U2AF2 --RTCB FSCN1 --RTCB TOP2B --RTCB HSP90AA1--RTCB
## [29] CAD --RTCB CHERP --RTCB RPL6 --RTCB SETD1A --RTCB
## + ... omitted several edges
summary(E(netWgcnaPpiHDAC2)$weight)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.005502 0.007489 0.010545 0.013691 0.016390 0.125328
# Separate lncRNA and protein_coding for inspection
protein_coding <- names(V(netWgcnaPpiHDAC2)[V(netWgcnaPpiHDAC2)$gene_type == "protein_coding"])
lncRNA <- names(V(netWgcnaPpiHDAC2)[V(netWgcnaPpiHDAC2)$gene_type != "protein_coding"])
# Write results
#write_graph(netWgcnaPpiHDAC2, file = "netWgcnaPpiHDAC2.graphml", format = "graphml")
#write.table(protein_coding, file = "for_RNAct_protein_coding.txt", quote = F, row.names = F, col.names = F)
#write.table(lncRNA, file = "for_RNAct_lncRNA.txt", quote = F, row.names = F, col.names = F)
We filtered the network on weight (previously) and now we apply a betweenness filter due to the large number of conections among nodes. I calculated the percentiles for edge betweenness to choose a threshold to filter the network.
vcount(netWgcnaPpiHDAC2) #354
## [1] 354
ecount(netWgcnaPpiHDAC2) #21552
## [1] 21552
# Calculate edge betweenness
edgeBet <- edge_betweenness(netWgcnaPpiHDAC2, e = E(netWgcnaPpiHDAC2))
plot(quantile(edgeBet, seq(0, 1, 0.01)), main = "Edge Betweenness Percentile", xlab = "Percentile", ylab = "Edge Betweenness")
quantile(edgeBet, seq(0, 1, 0.01))
## 0% 1% 2% 3% 4% 5% 6% 7% 8% 9% 10% 11% 12% 13% 14% 15%
## 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 16% 17% 18% 19% 20% 21% 22% 23% 24% 25% 26% 27% 28% 29% 30% 31%
## 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 32% 33% 34% 35% 36% 37% 38% 39% 40% 41% 42% 43% 44% 45% 46% 47%
## 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1
## 48% 49% 50% 51% 52% 53% 54% 55% 56% 57% 58% 59% 60% 61% 62% 63%
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2
## 64% 65% 66% 67% 68% 69% 70% 71% 72% 73% 74% 75% 76% 77% 78% 79%
## 2 2 2 2 2 2 3 3 3 4 4 4 5 5 6 7
## 80% 81% 82% 83% 84% 85% 86% 87% 88% 89% 90% 91% 92% 93% 94% 95%
## 7 8 9 10 11 12 13 15 16 18 19 21 22 24 26 28
## 96% 97% 98% 99% 100%
## 31 33 37 44 221
# Separate the 0.95 percentile
i <- edgeBet > unname(quantile(edgeBet, 0.95))
# Create a subgraph
netWgcnaPpiHDAC2Clean <- subgraph.edges(netWgcnaPpiHDAC2, E(netWgcnaPpiHDAC2)[i], delete.vertices = TRUE)
# Add edge betweenness
E(netWgcnaPpiHDAC2Clean)$eb <- edge_betweenness(netWgcnaPpiHDAC2Clean, e = E(netWgcnaPpiHDAC2Clean))
# Write results to inspect on Cytoscape
#write_graph(netWgcnaPpiHDAC2Clean, file = "netWgcnaPpiHDAC2Clean.graphml", format = "graphml")
Now we have a network - netWgcnaPpiHDAC2Clean with 353 nodes and 1049 edges, which is better to analyze.
We can extract the neighbors of HDAC2 and their gene_type.
neighHdac2 <- names(unlist(neighborhood(netWgcnaPpiHDAC2Clean, order=1, nodes = V(netWgcnaPpiHDAC2Clean)["HDAC2"])))
resHdac2 <- tempAnnot %>% filter(gene_name %in% neighHdac2)
# Add gene location on the genome
resHdac2 <- resHdac2 %>% left_join(annot0, by = c("gene_id", "gene_name", "gene_type")) %>% select(-X)
# Show results
resHdac2
## gene_id gene_name gene_type seqnames start end
## 1 ENSG00000080824 HSP90AA1 protein_coding chr14 102080738 102139699
## 2 ENSG00000130175 PRKCSH protein_coding chr19 11435288 11450968
## 3 ENSG00000132507 EIF5A protein_coding chr17 7306999 7312463
## 4 ENSG00000157020 SEC13 protein_coding chr3 10293131 10321112
## 5 ENSG00000182944 EWSR1 protein_coding chr22 29268009 29300525
## 6 ENSG00000186019 AC021092.1 antisense chr19 44105463 44113145
## 7 ENSG00000261480 GOLGA8M lincRNA chr15 28719377 28738431
## 8 ENSG00000196591 HDAC2 protein_coding chr6 113933028 114011308
## 9 ENSG00000197728 RPS26 protein_coding chr12 56041351 56044697
## 10 ENSG00000223745 CCDC18-AS1 processed_transcript chr1 93262186 93346025
## 11 ENSG00000225986 UBXN10-AS1 antisense chr1 20184242 20186486
## 12 ENSG00000228323 AC008440.1 antisense chr19 53854581 53869107
## 13 ENSG00000228509 AC006460.1 antisense chr2 190676944 190708716
## 14 ENSG00000228543 AC003684.1 lincRNA chrX 9249920 9275206
## 15 ENSG00000228784 LINC00954 lincRNA chr2 19868860 19885047
## 16 ENSG00000232485 AC098820.1 processed_transcript chr2 216479030 216498761
## 17 ENSG00000233393 AP000688.2 lincRNA chr21 36104881 36109690
## 18 ENSG00000233521 LINC01638 lincRNA chr22 27221349 27224727
## 19 ENSG00000237429 BX293535.1 antisense chr1 27525805 27530561
## 20 ENSG00000238197 PAXBP1-AS1 antisense chr21 32728115 32743122
## 21 ENSG00000241728 AP001062.3 sense_overlapping chr21 44328944 44330221
## 22 ENSG00000243176 AC092944.1 processed_transcript chr3 157175223 157381265
## 23 ENSG00000245522 AC026250.1 lincRNA chr11 9754770 9759533
## 24 ENSG00000245888 NSMCE1-DT antisense chr16 27268205 27290492
## 25 ENSG00000246863 AC012377.1 lincRNA chr15 39921042 39925880
## 26 ENSG00000247679 AC139795.2 antisense chr5 177611253 177619754
## 27 ENSG00000249180 AC020900.1 antisense chr5 96741079 96742698
## 28 ENSG00000249695 AC026369.1 antisense chr12 137411 149169
## 29 ENSG00000249740 OSMR-AS1 lincRNA chr5 38710367 38845829
## 30 ENSG00000250733 C8orf17 TEC chr8 139931172 139933946
## 31 ENSG00000251359 WWC2-AS2 lincRNA chr4 183097017 183099199
## 32 ENSG00000253307 AC011676.1 antisense chr8 141252286 141253292
## 33 ENSG00000255449 AP002812.5 antisense chr11 77866412 77870091
## 34 ENSG00000256139 AC007637.1 sense_overlapping chr12 109111218 109125594
## 35 ENSG00000256262 USP30-AS1 antisense chr12 109052350 109053952
## 36 ENSG00000257048 LINC02417 lincRNA chr12 3367833 3371346
## 37 ENSG00000258458 AL160314.2 processed_transcript chr14 22701476 22766562
## 38 ENSG00000258819 LINC02289 lincRNA chr14 77069180 77076344
## 39 ENSG00000260267 AC026471.1 antisense chr16 31456711 31459736
## 40 ENSG00000260711 AL121839.2 sense_intronic chr14 91752856 91759798
## 41 ENSG00000261118 AC092123.1 antisense chr16 89492017 89504460
## 42 ENSG00000261170 AC009053.3 antisense chr16 74422120 74435254
## 43 ENSG00000262728 AC123768.3 antisense chr15 32586105 32615158
## 44 ENSG00000263164 AC087500.2 lincRNA chr17 5240508 5241543
## 45 ENSG00000265282 AC005828.4 lincRNA chr17 63430468 63432211
## 46 ENSG00000267072 NAGPA-AS1 lincRNA chr16 5010909 5043999
## 47 ENSG00000267250 AC011591.1 lincRNA chr17 68793549 68797822
## 48 ENSG00000267655 AC125437.1 sense_intronic chr18 79117207 79117920
## 49 ENSG00000269053 AC010319.3 antisense chr19 17419305 17419774
## 50 ENSG00000269068 AC009955.4 antisense chr2 219559083 219559626
## 51 ENSG00000269707 AC018730.1 sense_overlapping chr2 104853285 104926052
## 52 ENSG00000270165 AC010530.1 sense_intronic chr16 67738588 67739922
## 53 ENSG00000272273 IER3-AS1 antisense chr6 30742929 30743592
## 54 ENSG00000273674 AC021752.1 lincRNA chr15 50839875 50908599
## 55 ENSG00000273828 AL133227.1 antisense chr20 46364551 46397994
## 56 ENSG00000275481 AC025031.4 lincRNA chr12 46388856 46392126
## 57 ENSG00000276412 AL591926.2 lincRNA chr9 41651658 41654594
## 58 ENSG00000277245 AC084782.3 antisense chr15 56447120 56447697
## 59 ENSG00000278864 AC055811.4 TEC chr17 17181504 17183257
## 60 ENSG00000278935 AC087386.2 TEC chr15 20141794 20146430
## 61 ENSG00000279212 AL390961.3 TEC chr10 26695091 26697625
## 62 ENSG00000279232 AC008522.1 antisense chr5 98792861 98795766
## 63 ENSG00000279970 AC023024.2 TEC chr15 101299656 101301648
## 64 ENSG00000280078 AC016526.3 TEC chr14 76279173 76283103
## 65 ENSG00000285095 AC025887.2 antisense chr18 32470288 32745567
## 66 ENSG00000285407 AL033530.1 lincRNA chr1 68679202 68943452
## 67 ENSG00000286039 AC093849.2 lincRNA chr4 173509633 173511764
## width strand
## 1 58962 -
## 2 15681 +
## 3 5465 +
## 4 27982 -
## 5 32517 +
## 6 7683 -
## 7 19055 -
## 8 78281 -
## 9 3347 +
## 10 83840 -
## 11 2245 -
## 12 14527 -
## 13 31773 -
## 14 25287 +
## 15 16188 +
## 16 19732 -
## 17 4810 +
## 18 3379 -
## 19 4757 +
## 20 15008 +
## 21 1278 -
## 22 206043 +
## 23 4764 -
## 24 22288 +
## 25 4839 +
## 26 8502 +
## 27 1620 -
## 28 11759 -
## 29 135463 -
## 30 2775 +
## 31 2183 -
## 32 1007 -
## 33 3680 -
## 34 14377 +
## 35 1603 -
## 36 3514 +
## 37 65087 -
## 38 7165 -
## 39 3026 -
## 40 6943 -
## 41 12444 -
## 42 13135 +
## 43 29054 -
## 44 1036 -
## 45 1744 -
## 46 33091 +
## 47 4274 -
## 48 714 +
## 49 470 -
## 50 544 +
## 51 72768 +
## 52 1335 -
## 53 664 +
## 54 68725 -
## 55 33444 +
## 56 3271 +
## 57 2937 -
## 58 578 +
## 59 1754 +
## 60 4637 +
## 61 2535 -
## 62 2906 -
## 63 1993 -
## 64 3931 +
## 65 275280 +
## 66 264251 +
## 67 2132 +
# Arrange by edge betweenness and weight
edgebetResHdac2 <- data.frame(as_edgelist(netWgcnaPpiHDAC2Clean)) %>% mutate(eb = E(netWgcnaPpiHDAC2Clean)$eb) %>% filter(X1 == "HDAC2" | X2 == "HDAC2") %>% arrange(desc(eb))
edgeweiResHdac2 <- data.frame(as_edgelist(netWgcnaPpiHDAC2Clean)) %>% mutate(wei = E(netWgcnaPpiHDAC2Clean)$weight) %>% filter(X1 == "HDAC2" | X2 == "HDAC2") %>% arrange(desc(wei))
#write.csv(resHdac2, file = "resHdac2.csv", row.names = F)
#write.csv(edgebetResHdac2, file = "edgebetResHdac2.csv", row.names = F)
#write.csv(edgeweiResHdac2, file = "edgeweiResHdac2.csv", row.names = F)
head(edgebetResHdac2)
## X1 X2 eb
## 1 HDAC2 AP001062.3 994
## 2 HDAC2 AC010319.3 741
## 3 HDAC2 AL390961.3 700
## 4 HDAC2 RPS26 674
## 5 HSP90AA1 HDAC2 610
## 6 EIF5A HDAC2 556
head(edgeweiResHdac2)
## X1 X2 wei
## 1 HDAC2 AC020900.1 0.009754748
## 2 GOLGA8M HDAC2 0.008836550
## 3 AC021092.1 HDAC2 0.008804229
## 4 HDAC2 USP30-AS1 0.008636667
## 5 HDAC2 WWC2-AS2 0.008583435
## 6 HDAC2 AC018730.1 0.007823994
We created a cleaned HDAC2 network that we can analyze with Cytoscape.
sessionInfo()
## R version 4.1.1 (2021-08-10)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.3 LTS
##
## Matrix products: default
## BLAS/LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.8.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=C
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] gprofiler2_0.2.1 igraph_1.2.6 dplyr_1.0.7
##
## loaded via a namespace (and not attached):
## [1] highr_0.9 pillar_1.6.3 compiler_4.1.1 jquerylib_0.1.4
## [5] bitops_1.0-7 tools_4.1.1 digest_0.6.28 viridisLite_0.4.0
## [9] jsonlite_1.7.2 evaluate_0.14 lifecycle_1.0.1 tibble_3.1.5
## [13] gtable_0.3.0 pkgconfig_2.0.3 rlang_0.4.11 DBI_1.1.1
## [17] crosstalk_1.1.1 yaml_2.2.1 xfun_0.26 fastmap_1.1.0
## [21] httr_1.4.2 stringr_1.4.0 knitr_1.36 htmlwidgets_1.5.4
## [25] generics_0.1.0 vctrs_0.3.8 grid_4.1.1 tidyselect_1.1.1
## [29] data.table_1.14.2 glue_1.4.2 R6_2.5.1 fansi_0.5.0
## [33] plotly_4.10.0 rmarkdown_2.11 tidyr_1.1.4 purrr_0.3.4
## [37] ggplot2_3.3.5 magrittr_2.0.1 scales_1.1.1 ellipsis_0.3.2
## [41] htmltools_0.5.2 assertthat_0.2.1 colorspace_2.0-2 utf8_1.2.2
## [45] stringi_1.7.5 RCurl_1.98-1.5 lazyeval_0.2.2 munsell_0.5.0
## [49] crayon_1.4.1